from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from urllib import request
import requests
import re

#word=input("请输入搜索的关键词:")
from selenium import webdriver
from selenium.webdriver.chrome.options import Options



headers={"Accept": "text/html, application/xhtml+xml, image/jxr, */*",

         "Accept - Encoding": "gzip, deflate, br",

         "Accept - Language": "zh - CN",

         "Connection": "Keep - Alive",
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299",
         "referer":"sogou.com"}


keyword = ['江苏百瑞赢证券咨询有限公司','信易赢','江苏百瑞赢骗子','百瑞赢诈骗','百瑞赢骗局','信易赢骗局','百瑞赢退费']

for ks in keyword:

    for mypage in range(1,6):

        page_str = ''

        if mypage != 1:
            page_str = '&page=%s' % mypage

        url="https://www.sogou.com/web?query="+ks+page_str

        #获取页面代码
        #第一种方式，使用requests
        #data = requests.get(url,headers=headers).content

        #第二种方式，使用后台selenium
        chrome_options = Options()
        # 设置中文
        chrome_options.add_argument('lang=zh_CN.UTF-8')
        # 更换头部
        chrome_options.add_argument('user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"')
        chrome_options.add_argument('--headless')
        driver = webdriver.Chrome(chrome_options = chrome_options)
        #driver = webdriver.Chrome()
        driver.get(url)
        data = driver.page_source
        #driver.close()

        #第三种方式，使用前台selenium
        #driver = webdriver.Chrome()
        #driver.get(url)
        #data = driver.page_source
        print(data)
        exit(-1)

        soup=BeautifulSoup(data,'html.parser')

        #匹配微信
        # div3 = soup.find_all(name='div',attrs={"class":"str-pd-box"})
        # for result_table in div3:
        #     a_click = result_table.find_all("a")
        #     for mya in a_click:
        #         table=mya.get_text()
        #         myurl = mya.get("href")
        #         print(table)
  
        
        

        for result_table in soup.find_all('h3'):
            try:
                a_click = result_table.find("a")
                table=a_click.get_text()
                myurl = a_click.get("href")
                print(table)

                #判断是否加密
                source_url = ''
                if myurl.find('http') != -1:
                    source_url = myurl
                else:
                    url_=str('http://www.sogou.com'+myurl)
                    print(url_)
                    sougou_content = requests.get(url=url_, headers=headers, allow_redirects=True)
                    #print(sougou_content.text)
                    searchObj = re.findall(r'replace\(\"(.*)\"\)',sougou_content.text,re.I)
                    if len(searchObj) > 0:
                        source_url = searchObj[0]
                print('原网址:%s' % source_url)

                file = r'./txt/%s.txt' % ks
                with open(file, 'a+') as f:
                    f.write('%s\t%s\n' % (table,source_url))
                
            except Exception as e: 
                print(str(e))
                pass


